%! TEX root = master.tex
\section{蒙特卡洛估计的期望值和误差分析}

对于标准正态分布 $N(0,1)$，考虑统计量 $X = I(x>0)$，即指示函数：

\begin{itemize}
  \item 理论期望值 $\mathbb{E}[X] = P(x>0) = 0.5$
  \item 理论方差 $\mathbb{V}ar(X) = \mathbb{E}[X^2] - \mathbb{E}[X]^2 = 0.5 - 0.25 = 0.25$
  \item 理论均方根误差 $\sigma = \sqrt{\mathbb{V}ar(X)} = 0.5$
\end{itemize}

\subsection{蒙特卡洛估计}

使用 $n$ 个独立样本的蒙特卡洛估计：

\begin{enumerate}
  \item 估计期望值：
\[
      \mu = \mathbb{E}[X] \approx \frac{1}{n} \sum_{i=1}^n X_i = \hat{\mu}_{n}
\]

  \item 估计误差：
\[
    \begin{aligned}
    \sigma &= \sqrt{\mathbb{V}ar[\mu]} = \sqrt{\frac{\mathbb{V}ar[X]}{n}} \\
    &= \sqrt{\frac{\mathbb{E}[(X - \mathbb{E}[X])^{2}]}{n}} \\
    &= \sqrt{\frac{\mathbb{E}[X^{2}] - \mathbb{E}[X]^{2}}{n}} \\
    &\approx 
    \frac{1}{\sqrt{n}} \sqrt{
      \left(
    \frac{1}{n} \sum_{i=1}^n{X_i^{2}} 
     - \hat{\mu}_{n}^{2}
    \right)
    }
    = \hat{\sigma}_{n}
    \end{aligned}
\]
其中
\[
    \hat{\sigma}_{n} \approx \sqrt{\frac{0.25}{n}}
\]
因此，估计误差随样本数 $n$ 的增加而减小，比例为 $1/\sqrt{n}$。
\end{enumerate}

\subsection{数值验证}

对于不同样本量 $n$，期望的估计误差如下表：

\begin{center}
\begin{tabular}{|c|c|}
\hline
样本量 $n$ & 期望误差 ($\sigma$) \\
\hline
100        & 0.05         \\
1,000      & 0.0158       \\
10,000     & 0.005        \\
100,000    & 0.00158      \\
\hline
\end{tabular}
\end{center}

测试中使用的样本量为 $10,000$，因此期望的估计误差约为 $0.005$。

\section{重要性采样}

\subsection{方法简介}

重要性采样（Importance Sampling）是一种通过引入提议分布（proposal distribution）$q(x)$ 来估计目标分布 $p(x)$ 下统计量期望的方法。其基本思想是：

\begin{itemize}
  \item 从易于采样的 $q(x)$ 采样
  \item 对每个样本赋予权重 $w(x) = p(x)/q(x)$
  \item 期望估计公式为：
    \[
      \begin{aligned}
      \mu &= \mathbb{E}_p[f(x)] \\ 
         &= \int{f(x) p(x)dx}   \\
         &\approx \frac{1}{n} \sum_{i=1}^n f(x_i) w(x_i)
      \end{aligned}
    \]
    其中 $x_i \sim q(x)$，权重通常归一化，即
    \[
    1 = \mathbb{E}_p [1] \approx \frac{1}{n}\sum_{i=1}^{n}{w(x_i)}
    \]
    因此
    \[
    \hat{\mu} = \frac{\sum_{i=1}^n f(x_i) w(x_i)}{\sum_{i=1}^n w(x_i)}
    \]
\end{itemize}

\subsection{方差}

设 $x_i \sim q(x)$，$w(x_i) = \frac{p(x_i)}{q(x_i)}$，
定义

\[
A = \sum_{i=1}^n f(x_i) w(x_i), \quad B = \sum_{i=1}^n w(x_i)
\]

我们希望估计两个随机变量和的比值
$$
\hat{\mu} = g(A,B) = \frac{A}{B}
$$
的方差 $\sigma^{2} = \mathrm{Var}(\hat{\mu})$。  

利用 delta 法（Delta Method），对函数 $g(A,B) = A/B$ 在 $(\mathbb{E}[A], \mathbb{E}[B])$ 处做一阶泰勒展开：
$$
g(A,B) \approx g(\mathbb{E}[A], \mathbb{E}[B]) + g_A (A - \mathbb{E}[A]) + g_B (B - \mathbb{E}[B])
$$
其中 
$$
g_A = \frac{\partial g}{\partial A} = \frac{1}{\mathbb{E}[B]}, \quad
g_B = \frac{\partial g}{\partial B} = -\frac{\mathbb{E}[A]}{(\mathbb{E}[B])^2}
$$
因此，
$$
\sigma^{2} = \mathrm{Var}[\hat{\mu}] 
\approx 
  g_A^2 \mathrm{Var}[A] 
+ 2g_A g_B \mathrm{Cov}[A,B]
+ g_B^2 \mathrm{Var}[B]
$$
代入 $g_A, g_B$，得
$$
\mathrm{Var}[\hat{\mu}]
\approx 
\frac{1}{(\mathbb{E}[B])^2} \mathrm{Var}[A] 
- \frac{2\mathbb{E}[A]}{(\mathbb{E}[B])^3} \mathrm{Cov}[A,B]
+ \frac{(\mathbb{E}[A])^2}{(\mathbb{E}[B])^4} \mathrm{Var}[B]
$$

由于 $A, B$ 都是 $n$ 个独立样本的和，
$$
\mathrm{Var}[A] = n \mathrm{Var}_q[f(x)w(x)]
= n \left[
\mathbb{E}_{q} [f^{2}(x)w^{2}(x)]
- \left(\mathbb{E}_{q} [f(x)w(x)]\right)^{2}
\right]
$$

$$
\mathrm{Var}[B] = n \mathrm{Var}_q[w(x)]
= n \left[
\mathbb{E}_{q} [w^{2}(x)]
- \left(\mathbb{E}_{q} [w(x)]\right)^{2}
\right]
$$

$$
\mathrm{Cov}[A,B] = \mathbb{E}_{q}[(A - \mathbb{E}_{q}[A]) (B - \mathbb{E}_{q}[B]) ]
= \mathbb{E}_{q}[A B] - \mathbb{E}_{q}[A] \mathbb{E}_{q}[B]
$$
  
$$
\mathbb{E}_{q}[A] = n \mathbb{E}_q[f(x)w(x)]
$$

$$
\mathbb{E}_{q}[B] = n \mathbb{E}_q[w(x)]
$$

\begin{align*}
\mathbb{E}_{q}[A B]
&= \mathbb{E}_{q}\left[
\left(\sum_{i=1}^{n}{f(x_i) w(x_i)}\right)
\left(\sum_{i=1}^{n}{w(x_i)}\right)
\right]
\\
&= \mathbb{E}_{q}\left[
\sum_{i=1}^{n}\sum_{j=1}^{n}{f(x_i) w(x_i)w(x_j)}
\right] \\
&= \sum_{i=1}^{n}\sum_{j=1}^{n}{\mathbb{E}_{q}[f(x_i) w(x_i)w(x_j)]} \\
&= \sum_{i=1}^{n}{\mathbb{E}_{q}[f(x_i) w^2(x_i)]} + 
   \sum_{i\neq j}{\mathbb{E}_{q}[f(x_i) w(x_i)]\mathbb{E}_{q}[w(x_j)]} \\
&= n\mathbb{E}_{q}[f(x) w^2(x)] + n(n-1)\mathbb{E}_{q}[f(x) w(x)]\mathbb{E}_{q}[w(x)]
\end{align*}

\begin{align}
	\mathrm{Cov}[A,B]
	&= \mathbb{E}_{q}[A B] - \mathbb{E}_{q}[A] \mathbb{E}_{q}[B]
	\\
	&= n\mathbb{E}_{q}[f(x) w^2(x)] - n\mathbb{E}_{q}[f(x) w(x)]\mathbb{E}_{q}[w(x)]
\end{align}

方差为
$$
\begin{aligned}
	\sigma^{2}
	&= \mathrm{Var}[\hat{\mu}] \\
	&\approx 
	\frac{n \left[
			\mathbb{E}_{q} [f^{2}(x)w^{2}(x)]
			- \left(\mathbb{E}_{q} [f(x)w(x)]\right)^{2}
		\right]
	}{(n \mathbb{E}_q[w(x)])^2}  
	\\
& - \frac{2n \mathbb{E}_q[f(x)w(x)]}{(n \mathbb{E}_q[w(x)])^3} \left(n\mathbb{E}_{q}[f(x) w^2(x)] - n\mathbb{E}_{q}[f(x) w(x)]\mathbb{E}_{q}[w(x)]\right) 
\\
& + \frac{(n \mathbb{E}_q[f(x)w(x)])^2}{(n \mathbb{E}_q[w(x)])^4} n \left[
	\mathbb{E}_{q} [w^{2}(x)]
	- \left(\mathbb{E}_{q} [w(x)]\right)^{2}
\right]
\\
	&\approx 
	\frac{\mathbb{E}_{q} [f^{2}(x)w^{2}(x)]}{n {(\mathbb{E}_q[w(x)])}^{2}}  
	- \frac{2 \mathbb{E}_{q}[f(x)w(x)]\mathbb{E}_{q}[f(x) w^2(x)]
	}{%
		n{(\mathbb{E}_q[w(x)])}^3
	}
	+ \frac{(\mathbb{E}_q[f(x)w(x)])^2\mathbb{E}_{q} [w^{2}(x)]}{n (\mathbb{E}_q[w(x)])^4} 
\end{aligned}
$$

\subsection{近似}

\begin{align*}
	\mathbb{E}_{q}[f(x)^{2}w^{2}(x)] 
&\approx \hat{\mu}_{f^2w^2}
= \frac{1}{n}\sum_{i=1}^n {f^2(x_i)w^2(x_i)}
\\
\mathbb{E}_{q}[f(x) w^2(x)] 
&\approx \hat{\mu}_{fw^2}
= \frac{1}{n}\sum_{i=1}^n f(x_i)w^2(x_i)
\\
\mathbb{E}_{q} [w^{2}(x)]
&= \hat{\mu}_{w^2} 
= \frac{1}{n}\sum_{i=1}^{n}{w^2(x_i)}
\\
\mathbb{E}_{q}[f(x) w(x)]
&\approx \hat{\mu}_A
= \frac{1}{n}\sum_{i=1}^n f(x_i)w(x_i)
\\
\mathbb{E}_{q}[w(x)]
&\approx \hat{\mu}_B
= \frac{1}{n}\sum_{i=1}^n w(x_i)
\end{align*}

方差的样本估计公式为：

$$
\begin{aligned}
\hat{\sigma}^2
	&\approx 
	\frac{\hat{\mu}_{f^2w^2} }{n {\hat{\mu}_B}^{2}}  
	- \frac{2 \hat{\mu}_{fw^2} \hat{\mu}_{A} }{ n{\hat{\mu}_B}^3 }
+ \frac{{(\hat{\mu}_{A})}^2\hat{\mu}_{w^2}}{n \hat{\mu}_B^4} 
\end{aligned}
$$

标准差的样本估计可以通过方差的平方根得到：

$$
\hat{\sigma} =  \sqrt{
	\frac{1}{n}
	\left(
		\frac{\hat{\mu}_{f^2w^2}}{\hat{\mu}_B^2}
		- \frac{2\hat{\mu}_A \hat{\mu}_{fw^2}}{\hat{\mu}_B^3}
		+ \frac{\hat{\mu}_A^2 \hat{\mu}_{w^2}}{\hat{\mu}_B^4}
	\right)
}
$$


\paragraph{参考文献：}
\begin{itemize}
  \item Owen, A. B. (2013). Monte Carlo theory, methods and examples. Section 9.2.
  \item Bishop, C. M. (2006). Pattern Recognition and Machine Learning. Section 11.1.2.
\end{itemize}
